# NumPy
import numpy as np
# Pandas
import pandas as pd
# Matplotlib
import matplotlib.pyplot as plt
# BeautifulSoup
from bs4 import BeautifulSoup
# Request
import requests
# RegEX
import re
!jupyter nbconvert --to html customer-segmentation-using-clustering.ipynb
#Getting the url
url = "https://en.wikipedia.org/wiki/List_of_Academy_Award-winning_films"
req = requests.get(url)
#Checking the status
req
#Getting the content
soup = BeautifulSoup(req.content)
Film = []
Year =[]
Award = []
Nomination = []
count = 0
for i in soup.findAll('td'):
i = re.sub('^<td>.*">|<td>|</td>|<.*>|\n',"",str(i))
if count == 0:
Film.append(i)
count += 1
elif count == 1:
Year.append(i)
count += 1
elif count == 2:
Award.append(i)
count += 1
else:
count = 0
Nomination.append(i)
df = pd.DataFrame({"Film":Film[:1332], "Years":Year[:1332], "Awards":Award[:1332], "Nominations":Nomination[:1332]})
df
df.head(10)
df.tail(10)